Broadcast Analysis

Mapped by core

library(ggplot2)
library(dplyr)
## 
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
## 
##     filter, lag
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union
library(tidyr)
library(dbplyr)
## 
## Attaching package: 'dbplyr'
## The following objects are masked from 'package:dplyr':
## 
##     ident, sql
bcast_core_thin = read.csv("Downloads/hpc_ex1/thin/bcast_core_thin.csv")

bcast_core_plot = 
  bcast_core_thin %>% 
  filter(cores %% 2 == 0) %>%
  pivot_longer(cols = c(basic_linear, chain, binary_tree)) %>%
  ggplot(aes(x = cores, y = value, color = name)) +
  geom_vline(xintercept = c(12,24), linetype = "dashed", color = "black")+
  annotate("text", x = c(12,24), y = rep(0,2), label = c(12,24), 
           vjust = 1, hjust = -0.1,color="black")+
  geom_line(size = 0.6) +  # Draw line first
  geom_point(size=1.5) +
  labs(title = "Latency vs # Cores, Mapped by Core, Message Size 1 MPI_CHAR",
       x = "# Cores",
       y = "Latency (us)",
       color = "Algorithm") +
  theme_bw() + 
  theme(legend.position = c(0.1, 0.82), 
        legend.background = element_rect(fill = "transparent", colour = NA))
## Warning: Using `size` aesthetic for lines was deprecated in ggplot2 3.4.0.
## ℹ Please use `linewidth` instead.
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
## generated.
bcast_core_plot

Mapped by socket

bcast_socket_thin = read.csv("Downloads/hpc_ex1/thin/bcast_socket_thin.csv")

bcast_socket_plot = bcast_socket_thin %>% 
  filter(cores %% 2 == 0) %>%
  pivot_longer(cols = c(basic_linear, chain, binary_tree)) %>%
  ggplot(aes(x = cores, y = value, color = name)) +
  geom_vline(xintercept = c(12,24), linetype = "dashed", color = "black")+
  annotate("text", x = c(12,24), y = rep(0,2), label = c(12,24), 
           vjust = 1, hjust = -0.1,color="black")+
  geom_line(size = 0.6) +  # Draw line first
  geom_point(size=1.5) +
  labs(title = "Latency vs # Cores, Mapped by Socket, Message Size 1 MPI_CHAR",
       x = "# Cores",
       y = "Latency (us)",
       color = "Algorithm") +
  theme_bw() + 
  theme(legend.position = c(0.1, 0.82), 
        legend.background = element_rect(fill = "transparent", colour = NA))

bcast_socket_plot

Mapped by node

bcast_node_thin = read.csv("Downloads/hpc_ex1/thin/bcast_node_thin.csv")

bcast_node_plot = bcast_node_thin %>% 
  filter(cores %% 2 == 0) %>%
  pivot_longer(cols = c(basic_linear, chain, binary_tree)) %>%
  ggplot(aes(x = cores, y = value, color = name)) +
  geom_vline(xintercept = c(12,24), linetype = "dashed", color = "black")+
  annotate("text", x = c(12,24), y = rep(0,2), label = c(12,24), 
           vjust = 1, hjust = -0.1,color="black")+
  geom_line(size = 0.6) +  # Draw line first
  geom_point(size=1.5) +
  labs(title = "Latency vs # Cores, Mapped by Node, Message Size 1 MPI_CHAR",
       x = "# Cores",
       y = "Latency (us)",
       color = "Algorithm") +
  theme_bw() + 
  theme(legend.position = c(0.1, 0.82), 
        legend.background = element_rect(fill = "transparent", colour = NA))

bcast_node_plot

Algorithms comparison

linear = data.frame(core = bcast_core_thin$basic_linear, socket = bcast_socket_thin$basic_linear,                                 node = bcast_node_thin$basic_linear, cores = bcast_core_thin$cores)

chain = data.frame(core = bcast_core_thin$chain, socket = bcast_socket_thin$chain,                                 node = bcast_node_thin$chain, cores = bcast_core_thin$cores)

binary = data.frame(core = bcast_core_thin$binary_tree, socket = bcast_socket_thin$binary_tree,                                 node = bcast_node_thin$binary_tree, cores = bcast_core_thin$cores)
  
linear_plot = 
linear %>% 
  filter(cores %% 2 == 0) %>%
  pivot_longer(cols = c(core, socket, node)) %>%
  ggplot(aes(x = cores, y = value, color = name)) +
  geom_vline(xintercept = c(12,24), linetype = "dashed", color = "black")+
  annotate("text", x = c(12,24), y = rep(0,2), label = c(12,24), 
           vjust = 1, hjust = -0.1,color="black")+
  geom_point(size = 1.5) +
  geom_line(aes(group = name), size = 0.6) + 
  labs(title = "Linear Algorithm, Latency vs # Cores",
       x = "# Cores",
       y = "Latency (us)",
       color = "Allocation") +
  theme_bw() + 
  theme(legend.position = c(0.1, 0.82), 
        legend.background = element_rect(fill = "transparent", colour = NA))


chain_plot = 
chain %>% 
  filter(cores %% 2 == 0) %>%
  pivot_longer(cols = c(core, socket, node)) %>%
  ggplot(aes(x = cores, y = value, color = name)) +
  geom_vline(xintercept = c(12,24), linetype = "dashed", color = "black")+
  annotate("text", x = c(12,24), y = rep(0,2), label = c(12,24), 
           vjust = 1, hjust = -0.1,color="black")+
  geom_point(size = 1.5) +
  geom_line(aes(group = name), size = 0.6) + 
  labs(title = "Chain Tree Algorithm, Latency vs # Cores",
       x = "# Cores",
       y = "Latency (us)",
       color = "Allocation") +
  theme_bw() + 
  theme(legend.position = c(0.1, 0.82), 
        legend.background = element_rect(fill = "transparent", colour = NA))


binary_plot =
binary %>% 
  filter(cores %% 2 == 0) %>%
  pivot_longer(cols = c(core, socket, node)) %>%
  ggplot(aes(x = cores, y = value, color = name)) +
  geom_vline(xintercept = c(12,24), linetype = "dashed", color = "black")+
  annotate("text", x = c(12,24), y = rep(0,2), label = c(12,24), 
           vjust = 1, hjust = -0.1,color="black")+
  geom_point(size = 1.5) +
  geom_line(aes(group = name), size = 0.6) + 
  labs(title = "Binary Tree Algorithm, Latency vs # Cores",
       x = "# Cores",
       y = "Latency (us)",
       color = "Allocation") +
  theme_bw() + 
  theme(legend.position = c(0.1, 0.82), 
        legend.background = element_rect(fill = "transparent", colour = NA))

linear_plot

chain_plot

binary_plot

Broadcast Performance Model

Linear Algorithm

library(plotly)
## 
## Attaching package: 'plotly'
## The following object is masked from 'package:ggplot2':
## 
##     last_plot
## The following object is masked from 'package:stats':
## 
##     filter
## The following object is masked from 'package:graphics':
## 
##     layout
linear <- read.csv("Downloads/hpc_ex1/thin/bcast_linear.csv")


linear <- data.frame(
  log2Latency=as.numeric(unlist(log2(linear %>% filter(Allocation == "core") %>% select(Latency)))),
  Processes= as.numeric(unlist(linear %>% filter(Allocation == "core") %>% select(Processes))),
  log2MessageSize=as.numeric(unlist(log2(linear %>% filter(Allocation == "core") %>% select(MessageSize))))
)



fig <- plot_ly(linear, x = ~Processes, y = ~log2MessageSize, z = ~log2Latency,
               marker = list(size = 8, color = ~log2Latency, colorscale = 'Viridis',
                             line = list(color = 'black', width = 2)))
fig <- fig %>% add_markers()

fig <- fig %>% layout(scene = list(xaxis = list(title = 'Number of Cores'),
                                   yaxis = list(title = 'log2(MessageSize)'),
                                   zaxis = list(title = 'log2(Latency)')),
                      title = '3D Plot of Latency by Number of Cores and Message Size')
fig
fig <- linear %>%
  plot_ly(
    x = ~log2MessageSize,
    y = ~Processes,
    z = ~log2Latency,
    type = 'scatter3d', # Specify plot type
    mode = 'markers',   # Use markers
    marker = list(
      size = 6,  
      color = 'grey',
      opacity = 0.3,
      line = list(color = 'black', width = 0.5)
    )
  ) %>%
  layout(
    title = "",
    scene = list(
      xaxis = list(
        title = "log2(MessageSize)",
        titlefont = list(size = 11)  
      ),
      yaxis = list(
        title = "Cores",
        titlefont = list(size = 11)  
      ),
      zaxis = list(
        title = "log2(Latency)",
        titlefont = list(size = 11)  
      )
    )
  )

# Create a meshgrid for x and y values
x_values <- seq(min(linear$log2MessageSize), max(linear$log2MessageSize), length.out = 100)
y_values <- seq(min(linear$Processes), max(linear$Processes), length.out = 100)
meshgrid <- expand.grid(log2MessageSize = x_values, Processes = y_values)

model<- lm(log2Latency~-1 + Processes + log2MessageSize + I(log2MessageSize^2), linear)
summary(model)
## 
## Call:
## lm(formula = log2Latency ~ -1 + Processes + log2MessageSize + 
##     I(log2MessageSize^2), data = linear)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -2.60325 -0.44425 -0.00247  0.42983  1.20675 
## 
## Coefficients:
##                        Estimate Std. Error t value Pr(>|t|)    
## Processes             0.0703302  0.0013204   53.26   <2e-16 ***
## log2MessageSize      -0.2895294  0.0098068  -29.52   <2e-16 ***
## I(log2MessageSize^2)  0.0350895  0.0005347   65.62   <2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.6785 on 984 degrees of freedom
## Multiple R-squared:  0.9794, Adjusted R-squared:  0.9794 
## F-statistic: 1.561e+04 on 3 and 984 DF,  p-value: < 2.2e-16
# Predict the z values using the linear model
z_values <- predict(model, newdata = meshgrid)

# Reshape the predicted z values into a matrix
z_matrix <- matrix(z_values, nrow = length(y_values), ncol = length(x_values), byrow = TRUE)

# Add the regression plane to the scatter plot with lower opacity
fig <- fig %>%
  add_surface(
    x = ~x_values,
    y = ~y_values,
    z = z_matrix, # This hides the color scale legend
    opacity = 1,  # Adjust opacity for the regression plane
    name = 'Regression Plane',
    showlegend = FALSE,
    showscale = FALSE
  )

fig
## Warning: 'surface' objects don't have these attributes: 'mode', 'marker'
## Valid attributes include:
## '_deprecated', 'autocolorscale', 'cauto', 'cmax', 'cmid', 'cmin', 'coloraxis', 'colorbar', 'colorscale', 'connectgaps', 'contours', 'customdata', 'customdatasrc', 'hidesurface', 'hoverinfo', 'hoverinfosrc', 'hoverlabel', 'hovertemplate', 'hovertemplatesrc', 'hovertext', 'hovertextsrc', 'ids', 'idssrc', 'legendgroup', 'legendgrouptitle', 'legendrank', 'lighting', 'lightposition', 'meta', 'metasrc', 'name', 'opacity', 'opacityscale', 'reversescale', 'scene', 'showlegend', 'showscale', 'stream', 'surfacecolor', 'surfacecolorsrc', 'text', 'textsrc', 'type', 'uid', 'uirevision', 'visible', 'x', 'xcalendar', 'xhoverformat', 'xsrc', 'y', 'ycalendar', 'yhoverformat', 'ysrc', 'z', 'zcalendar', 'zhoverformat', 'zsrc', 'key', 'set', 'frame', 'transforms', '_isNestedKey', '_isSimpleKey', '_isGraticule', '_bbox'

Chain Algorithm

chain <- read.csv("Downloads/hpc_ex1/thin/bcast_chain.csv")

chain<- data.frame(
  log2Latency=as.numeric(unlist(log2(chain %>% filter(Allocation == "core") %>% select(Latency)))),
  Processes= as.numeric(unlist(chain %>% filter(Allocation == "core") %>% select(Processes))),
  log2MessageSize=as.numeric(unlist(log2(chain %>% filter(Allocation == "core") %>% select(MessageSize))))
)


fig <- chain %>%
  plot_ly(
    x = ~log2MessageSize,
    y = ~Processes,
    z = ~log2Latency,
    type = 'scatter3d', 
    mode = 'markers',
    marker = list(
      size = 6,  
      colorscale = 'Viridis',
      color = ~log2Latency,
      line = list(color = 'black', width = 0.5)
    )
  ) %>%
  layout(
    title = "",
    scene = list(
      xaxis = list(
        title = "log2(MessageSize)",
        titlefont = list(size = 11)  
      ),
      yaxis = list(
        title = "Cores",
        titlefont = list(size = 11)  
      ),
      zaxis = list(
        title = "log2(Latency)",
        titlefont = list(size = 11)  
      )
    )
  )

fig
model_chain<- lm(log2Latency~-1 + Processes + log2MessageSize + I(log2MessageSize^2), data = chain)
summary(model_chain)
## 
## Call:
## lm(formula = log2Latency ~ -1 + Processes + log2MessageSize + 
##     I(log2MessageSize^2), data = chain)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -2.43839 -0.32658  0.00792  0.34625  1.16366 
## 
## Coefficients:
##                        Estimate Std. Error t value Pr(>|t|)    
## Processes             0.0634918  0.0011072   57.34   <2e-16 ***
## log2MessageSize      -0.3145670  0.0082232  -38.25   <2e-16 ***
## I(log2MessageSize^2)  0.0357085  0.0004484   79.64   <2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.5689 on 984 degrees of freedom
## Multiple R-squared:  0.9833, Adjusted R-squared:  0.9832 
## F-statistic: 1.93e+04 on 3 and 984 DF,  p-value: < 2.2e-16

Binary Tree Algorithm

binary <- read.csv("Downloads/hpc_ex1/thin/bcast_binary.csv")

binary <- data.frame(
  log2Latency=as.numeric(unlist(log2(binary %>% filter(Allocation == "core") %>% select(Latency)))),
  Processes= as.numeric(unlist(binary %>% filter(Allocation == "core") %>% select(Processes))),
  log2MessageSize=as.numeric(unlist(log2(binary %>% filter(Allocation == "core") %>% select(MessageSize))))
)


fig <- binary %>%
  plot_ly(
    x = ~log2MessageSize,
    y = ~Processes,
    z = ~log2Latency,
    type = 'scatter3d', 
    mode = 'markers',
    marker = list(
      size = 6,  
      colorscale = 'Viridis',
      color = ~log2Latency,
      line = list(color = 'black', width = 0.5)
    )
  ) %>%
  layout(
    title = "",
    scene = list(
      xaxis = list(
        title = "log2(MessageSize)",
        titlefont = list(size = 11)  
      ),
      yaxis = list(
        title = "Cores",
        titlefont = list(size = 11)  
      ),
      zaxis = list(
        title = "log2(Latency)",
        titlefont = list(size = 11)  
      )
    )
  )

fig
model_binary<- lm(log2Latency~-1 + Processes + log2MessageSize + I(log2MessageSize^2), data = binary)
summary(model_binary)
## 
## Call:
## lm(formula = log2Latency ~ -1 + Processes + log2MessageSize + 
##     I(log2MessageSize^2), data = binary)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -2.41313 -0.26496 -0.00981  0.24957  1.10428 
## 
## Coefficients:
##                        Estimate Std. Error t value Pr(>|t|)    
## Processes             0.0592922  0.0009693   61.17   <2e-16 ***
## log2MessageSize      -0.3221704  0.0071993  -44.75   <2e-16 ***
## I(log2MessageSize^2)  0.0366723  0.0003925   93.42   <2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.4981 on 984 degrees of freedom
## Multiple R-squared:  0.9871, Adjusted R-squared:  0.9871 
## F-statistic: 2.519e+04 on 3 and 984 DF,  p-value: < 2.2e-16